import os
import pandas as pd

import sys

sys.path.append("..")
from settings import llm_industry_prompt
from utils import get_remain_data, load_obj, save_obj

file = "../data/氢.csv"

df = pd.read_csv(file)

industry_infos = []

for _, row in df.iterrows():
    industry_name, domain = row["企业名称"], row["经营范围"]
    industry_infos.append(
        llm_industry_prompt(industry_name=industry_name, domain=domain)
    )


def valid_func(item):
    if "error_code" in item:
        return False
    return True

objs = []
for file_name in os.listdir(p:="objs/online_infer"):
    if file_name.endswith("pkl"):
        objs.append(os.path.join(p, file_name))
        
        
datas = list(map(load_obj, objs))
remain_text, valid_data = get_remain_data(set(industry_infos), valid_func, datas)
if len(remain_text) == 0:
    remain_text = industry_infos